
#define __STDC_FORMAT_MACROS
#include <assert.h>
#include <getopt.h>
#include <inttypes.h>
#include <libgen.h>
#include <map>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <vector>

#include "BaseCallingProfile.h"
#include "GCBiasProfile.h"
#include "IndelProfile.h"
#include "MaskQvalsByEamss.h"
#include "SimulationParameters.h"

#include "InputStream.h"
#include "OutputStream.h"

#include "Read.h"

#include "util.h"
#include "pirs.h"

#ifdef ENABLE_THREADS
#include <omp.h>
#include "SharedQueue.h"
#endif

using std::vector;
using std::map;
using std::max;

extern bool read_scaffold(InputStream &in, string &id, vector<char> &seq);

class SimulationProfiles {
public:
	const GCBiasProfile *gc_bias_profile;
	const BaseCallingProfile *base_calling_profile;
	const IndelProfile *indel_profile;

	SimulationProfiles(const SimulationParameters &params);
	~SimulationProfiles();
};

class SimulationCounters {
private:
	unsigned read_len;
public:
	uint64_t *error_pos_distr;
	map<int, uint64_t> insert_len_distr;
	uint64_t ins_count;
	uint64_t ins_sum;
	uint64_t del_count;
	uint64_t del_sum;
	uint64_t num_inserts;
	uint64_t discarded_inserts;
	uint64_t masked_bases;

	void init() {
		ins_count         = 0;
		ins_sum           = 0;
		del_count         = 0;
		del_sum           = 0;
		num_inserts       = 0;
		discarded_inserts = 0;
		masked_bases      = 0;
	}
	void init(unsigned _read_len) {
		init();
	        read_len = _read_len;
		error_pos_distr = new uint64_t[2 * read_len];
		memset(error_pos_distr, 0, 2 * read_len * sizeof(error_pos_distr[0]));
	}

	SimulationCounters(unsigned _read_len) {
		init(_read_len);
	}

	SimulationCounters() :
		error_pos_distr(NULL) 
	{ }

	~SimulationCounters() {
		delete error_pos_distr;
	}
	void add(SimulationCounters &other) {
		for (unsigned i = 0; i < 2 * read_len; i++)
			error_pos_distr[i] += other.error_pos_distr[i];
		map<int, uint64_t>::const_iterator it;
		for (it = other.insert_len_distr.begin(); it != other.insert_len_distr.end(); it++)
			insert_len_distr[it->first] += it->second;

		ins_count         += other.ins_count;
		ins_sum           += other.ins_sum;
		del_count         += other.del_count;
		del_sum           += other.del_sum;
		num_inserts       += other.num_inserts;
		discarded_inserts += other.discarded_inserts;
		masked_bases      += other.masked_bases;
	}
};

static void pirs_simulate_usage()
{
	const char *usage_str = 
"Usage: ./pirs simulate [OPTION]... REFERENCE.FASTA...\n"
"\n"
"pIRS is a program for simulating paired-end reads from a genome.  It is\n"
"optimized for simulating reads from the Illumina platform.  The input to\n"
"pIRS is any number of reference sequences. Typically you would just provide\n"
"one FASTA file containing your reference sequence, but you may provide two\n"
"if you have generated a diploid sequence with `pirs diploid', or if you have\n"
"chromosomes split up into multiple FASTA files.  The output of pIRS is two\n"
"FASTQ files containing the simulated paired-end reads, as well as several log\n"
"files.\n"
"\n"
"Input sequences are assumed to be haploid.  If you instead want to simulate\n"
"reads from a diploid genome, you must give the --diploid option so that\n"
"the diploidy is taken into account when computing coverage.  If you do\n"
"not do this, you will get twice as many reads as you wanted.\n"
"\n"
"pIRS simulates a normally-distributed insert (fragment) length using the\n"
"Box-muller method.  Usually you want the insert length standard deviation to\n"
"be 1/20 or 1/10 of the insert length mean (see the -m and -v options).\n"
"This program also simulates Illumina sequencing error, quality score and\n"
"GC bias based on empirical distribution profiles. Users may use the default\n"
"profiles in this package, which are generated by large real sequencing data,\n"
"or they may generate their own profiles.\n"
"\n"
"OPTIONS:\n"
"  -l LEN, --read-len=LEN\n"
"                Generate reads having a length of LEN.  Default: 100\n"
"\n"
"  -x VAL, --coverage=VAL\n"
"                 Set the average sequencing coverage (sometimes called depth).\n"
"                 It may be either a floating-point number or an integer.\n"
"\n"
"  -m LEN, --insert-len-mean=LEN\n"
"                 Generate inserts (fragments) having an average length of LEN.\n"
"                 Default: 180\n"
"\n"
"  -v LEN, --insert-len-sd=LEN\n"
"                 Set the standard deviation of the insert (fragment) length.\n"
"                 Default: 10% of insert length mean.\n"
"\n"
"  -j, --jumping, --cyclicize\n"
"                 Make the paired-end reads face away from either other, as\n"
"                 in a jumping library.  Default: the reads face towards each\n"
"                 other.\n"
"\n"
"  -d, --diploid\n"
"                 This option asserts that reads are being simulated from a\n"
"                 diploid genome.  It causes the program to abort if there\n"
"                 are not exactly two reference sequences; in addition, the\n"
"                 coverage is divided in half, since the two reference\n"
"                 sequences are in reality the same genome.  This option\n"
"                 is not required to simulate diploid reads, but you must\n"
"                 set the coverage correctly otherwise (it will be half\n"
"                 as much as you think).\n"
"\n"
"  -B FILE, --base-calling-profile=FILE, --subst-error-profile=FILE\n"
"                 Use FILE as the base-calling profile.  This profile will be\n"
"                 used to simulate substitution errors.  Default:\n"
"                 "DEFAULT_BASE_CALLING_PROFILE"\n"
"\n"
"  -I FILE, --indel-error-profile=FILE, --indel-profile=FILE\n"
"                 Use FILE as the indel-error profile.  This profile will be\n"
"                 used to simulate insertions and deletions in the reads that\n"
"                 are artifacts of the sequencing process.  Default:\n"
"                 "DEFAULT_INDEL_PROFILE"\n"
"\n"
"  -G FILE, --gc-bias-profile=FILE, --gc-content-bias-profile=FILE\n"
"                 Use FILE as the GC content bias profile.  This profile will\n"
"                 adjust the read coverage based on the GC content of\n"
"                 fragments.  Defaults:\n"
"                 "DEFAULT_GC_BIAS_PROFILE_100",\n"
"                 "DEFAULT_GC_BIAS_PROFILE_150",\n"
"                 "DEFAULT_GC_BIAS_PROFILE_200",\n"
"                 depending on the mean insert length.\n"
"\n"
"  -e FILE, --error-rate=RATE, --subst-error-rate=RATE\n"
"                 Set the substitution error rate.  The base-calling profile\n"
"                 will still be used, but the average frequency of errors will\n"
"                 be changed to RATE.  Set to 0 to disable substitution errors\n"
"                 completely.  In that case, the base-calling profile will not\n"
"                 be used.  Default: default error rate of base-calling\n"
"                 profile.\n"
"\n"
"                 Note: since pIRS parameterizes the error rate by\n"
"                 several parameters, it is very difficult to determine exactly\n"
"                 what needs to be done to make the error rate be a given\n"
"                 value.  We try to adjust the probabilities of getting each\n"
"                 quality score in order to accomodate the user-supplied error\n"
"                 rate.  However, depending on your input sequences, the actual\n"
"                 error rate simulated by pIRS could be off by 20% or more.\n"
"                 Please check the informational output to see the final error\n"
"                 rate that was actually simulated.\n"
"\n"
"  -A ALGO, --substitution-error-algorithm=ALGO, --subst-error-algo=ALGO\n"
"                 Set the algorithm used for simulating substitition errors.\n"
"                 It may be set to the string \"dist\" or \"qtrans\".  The\n"
"                 default is \"qtrans\".\n"
"\n"
"                 The \"dist\" algorithm looks up the substitution error rate\n"
"                 for each base pair based on the current cycle and the true\n"
"                 base.  This lookup produces a quality score and a called base\n"
"                 that may or may not be the same as the true base.  In the\n"
"                 base-calling profile, the matrix we use is marked as the\n"
"                 [DistMatrix].\n"
"\n"
"                 The \"qtrans\" algorithm is a Markov-chain model based on the\n"
"                 previous quality score and current cycle.  The next quality\n"
"                 score is looked up with a certain probability based on these\n"
"                 parameters.  The matrix used for this is marked as\n"
"                 [QTransMatrix] in the base-calling profile. Then, the the\n"
"                 DistMatrix is used to find a called base for the quality score.\n"
"                 The DistMatrix is also used to call the base in the first\n"
"                 cycle.\n"
"\n"
"  -M MODE, --mask=MODE, --eamss=MODE\n"
"                 Use the EAMSS algorithm for masking read quality.  MODE may be\n"
"                 the string \"quality\" or \"lowercase\".  The EAMSS algorithm\n"
"                 identifies low-quality, GC-rich regions near the ends of reads.\n"
"                 \"quality\" mode will change the quality scores on these\n"
"                 regions to (2 + quality_shift), while \"lowercase\" mode\n"
"                 will change the base pairs to lower case, but not change\n"
"                 the quality values.  Default: Do not use EAMSS.\n"
"\n"
"  -Q VAL, --quality-shift=VAL, --phred-offset=VAL\n"
"                 Set the ASCII shift of the quality value (usually 64 or 33 for\n"
"                 Illumina data).  Default: 33\n"
"\n"
"  --no-quality-values\n"
"  --fasta\n"
"                 Do not simulate quality values.  The simulated reads will be\n"
"                 written as a FASTA file rather than a FASTQ file.\n"
"                 Substitution errors may still be done; if you do not want\n"
"                 to simulate any substition errors, provide --error-rate=0 or\n"
"                 --no-substitution-errors.\n"
"\n"
"  --no-subst-errors\n"
"  --no-substitution-errors\n"
"                 Do not simulate substitution errors.  Equivalent to\n"
"                 --error-rate=0.\n"
"\n"
"  --no-indels\n"
"  --no-indel-errors\n"
"                 Do not simulate indels.  The indel error profile will not be\n"
"                 used.\n"
"\n"
"  --no-gc-bias\n"
"  --no-gc-content-bias\n"
"                 Do not simulate GC bias.  The GC bias profile will not be\n"
"                 used.\n"
"\n"
"  -o PREFIX, --output-prefix=PREFIX\n"
"                 Use PREFIX as the prefix of the output files.  Default:\n"
"                 \"pirs_reads\"\n"
"\n"
"  -c TYPE, --output-file-type=TYPE\n"
"                 The string \"text\" or \"gzip\" to specify the type of\n"
"                 the output FASTQ files containing the simulated reads\n"
"                 of the genome, as well as the log files.  Default: \"text\"\n"
"\n"
"  -z, --compress\n"
"                 Equivalent to -c gzip.\n"
"\n"
"  -n, --no-logs, --no-log-files\n"
"                 Do not write the log files.\n"
"\n"
"  -S SEED, --random-seed=SEED\n"
"                 Use SEED as the random seed. Default:\n"
"                 time(NULL) * getpid().  Note: If pIRS was not compiled with\n"
"                 --disable-threads, each thread actually uses its own random\n"
"                 number generator that is seeded by this base seed added to\n"
"                 the thread number; also, if you need pIRS's output to be\n"
"                 exactly reproducible, you must specify the random seed as well\n"
"                 as use only 1 simulator thread (--threads=1, or configure\n"
"                 with --disable-threads, or run on system with 4 or fewer\n"
"                 processors).\n"
"\n"
"  -t, --threads=NUM_THREADS\n"
"                 Use NUM_THREADS threads to simulate reads.  This option is\n"
"                 not available if pIRS was compiled with the --disable-threads\n"
"                 option.  Default: number of processors minus 2 if writing\n"
"                 uncompressed output, or number of processors minus 3 if\n"
"                 writing compressed output, or 1 if there are not this many\n"
"                 processors.\n"
"\n"
"  -q, --quiet    Do not print informational messages.\n"
"\n"
"  -h, --help     Show this help and exit.\n"
"\n"
"  -V, --version  Show version information and exit."
	;
	puts(usage_str);
}

static void pirs_simulate_usage_short()
{
	const char *usage_str_short = 
"Usage: pirs simulate [OPTIONS...] REFERENCE.FASTA...\n"
"Options:\n"
"  -l LEN     Set read length\n"
"  -x COV     Set coverage\n"
"  -m MEAN    Set insert length mean\n"
"  -v STDDEV  Set insert length standard deviation\n"
"  -j         Simulate jumping library\n"
"  -d         Simulate from diploid genome produced by `pirs diploid'\n"
"  -o PREFIX  Set output prefix\n"
"  -h         Show detailed help\n"
"Not all options are shown here.  Try `pirs simulate -h' for the full help."
	;
	puts(usage_str_short);
}

enum {
	OPTION_NO_QUALITY_VALUES = 1001,
	OPTION_NO_SUBSTITUTION_ERRORS,
	OPTION_NO_INDELS,
	OPTION_NO_GC_BIAS,
};
static const char *optstring = "l:x:m:v:jdB:I:G:e:A:M:Q:o:c:znS:t:qhV";
static const struct option longopts[] = {
	{"read-len",                    required_argument, NULL, 'l'},
	{"coverage",                    required_argument, NULL, 'x'},
	{"insert-len-mean",             required_argument, NULL, 'm'},
	{"insert-len-sd",               required_argument, NULL, 'v'},
	{"jumping",                     no_argument,       NULL, 'j'},
	{"diploid",                     no_argument,       NULL, 'd'},
	{"cyclicize",                   no_argument,       NULL, 'j'},
	{"base-calling-profile",        required_argument, NULL, 'B'},
	{"subst-error-profile",         required_argument, NULL, 'B'},
	{"indel-profile",               required_argument, NULL, 'I'},
	{"indel-error-profile",         required_argument, NULL, 'I'},
	{"gc-bias-profile",             required_argument, NULL, 'G'},
	{"gc-content-bias-profile",     required_argument, NULL, 'G'},
	{"error-rate",                  required_argument, NULL, 'e'},
	{"subst-error-rate",            required_argument, NULL, 'e'},
	{"substitition-error-algorithm",required_argument, NULL, 'A'},
	{"subst-error-algo",            required_argument, NULL, 'A'},
	{"mask",                        required_argument, NULL, 'M'},
	{"eamss",                       required_argument, NULL, 'M'},
	{"quality-shift",               required_argument, NULL, 'Q'},
	{"phred-offset",                required_argument, NULL, 'Q'},
	{"no-quality-values",           no_argument,       NULL, OPTION_NO_QUALITY_VALUES},
	{"fasta",                       no_argument,       NULL, OPTION_NO_QUALITY_VALUES},
	{"no-subst-errors",             no_argument,       NULL, OPTION_NO_SUBSTITUTION_ERRORS},
	{"no-substitution-errors",      no_argument,       NULL, OPTION_NO_SUBSTITUTION_ERRORS},
	{"no-indels",                   no_argument,       NULL, OPTION_NO_INDELS},
	{"no-indel-errors",             no_argument,       NULL, OPTION_NO_INDELS},
	{"no-gc-bias",                  no_argument,       NULL, OPTION_NO_GC_BIAS},
	{"no-gc-content-bias",          no_argument,       NULL, OPTION_NO_GC_BIAS},
	{"output-file-type",		required_argument, NULL, 'c'},
	{"compress",			no_argument,       NULL, 'z'},
	{"output-prefix",		required_argument, NULL, 'o'},
	{"no-logs",			no_argument,       NULL, 'n'},
	{"no-log-files",		no_argument,       NULL, 'n'},
	{"random-seed",		 	required_argument, NULL, 'S'},
	{"threads",			required_argument, NULL, 't'},
	{"quiet",		 	no_argument, 	   NULL, 'q'},
	{"help",			no_argument,	   NULL, 'h'},
	{"version",			no_argument,	   NULL, 'V'},
	{NULL, 0, NULL, 0}
};

/* From the command line, construct a SimulationParameters object containing the
 * parameters for the simulation. */
SimulationParameters::SimulationParameters(int argc, char *argv[])
	:  
	   read_len			(100),
	   coverage			(5.0),
	   insert_len_mean		(180),
	   insert_len_sd		(-1.0),
	   jumping			(false),
	   diploid			(false),
	   base_calling_profile_filename(NULL),
	   gc_bias_profile_filename	(NULL),
	   indel_profile_filename	(NULL),
	   error_rate			(-1.0),
	   subst_error_algo		(ALGO_QTRANS),
	   quality_mask_mode		(casava::demultiplex::MODE_NONE),
	   quality_shift		(33),
	   simulate_quality_values	(true),
	   simulate_substitution_errors (true),
	   simulate_indels		(true),
	   simulate_gc_bias		(true),
	   write_log_files		(true),
	   user_specified_random_seed   (false),
	   num_simulator_threads	(-1),
	   output_prefix		("pirs_reads")
{
	argc--;
	argv++;
	int c;
	char *tmp;
	while ((c = getopt_long(argc, argv, optstring, longopts, NULL)) != -1) {
		switch (c) {
		case 'l': 
			read_len = strtol(optarg, &tmp, 10);
			if (tmp == optarg || read_len <= 0) {
				fatal_error("The read length must be greater "
					    "than 0; please check option -l.");
			}
			break;
		case 'x': 
			coverage = strtod(optarg, &tmp);
			if (tmp == optarg || coverage <= 0.0) {
				fatal_error("The coverage must be greater "
					    "than 0; please check option -x.");
			}
			break;
		case 'm': 
			insert_len_mean = strtod(optarg, &tmp);
			if (tmp == optarg || insert_len_mean <= 0.0) {
				fatal_error("The insert size mean be greater "
					    "than 0; please check option -m.");
			}
			break;
		case 'v': 
			insert_len_sd = strtod(optarg, &tmp);
			if (tmp == optarg || insert_len_sd < 0.0) {
				fatal_error("The insert size standard deviation "
					    "must be non-negative; please check "
					    "option -v");
			}
			break;
		case 'j':
			jumping = true;
			break;
		case 'd':
			diploid = true;
			break;
		case 'B':
			base_calling_profile_filename = optarg;
			break;
		case 'I':
			indel_profile_filename = optarg;
			break;
		case 'G':
			gc_bias_profile_filename = optarg;
			break;
		case 'e': 
			error_rate = strtod(optarg, &tmp);

			if (tmp == optarg || !in_unit_interval(error_rate)) {
				fatal_error("The error rate must be a number in the interval "
					    "[0, 1]; please check option -e.");
			}
			if (error_rate == 0.0)
				simulate_substitution_errors = false;
			break;
		case 'A':
			if (strcmp(optarg, "qtrans") == 0)
				subst_error_algo = ALGO_QTRANS;
			else if (strcmp(optarg, "dist") == 0)
				subst_error_algo = ALGO_DIST;
			else
				fatal_error("Unknown algorithm \"%s\"; please "
					    "check option -A.\n"
					    "The available substitution error "
					    "simulation algorithms are "
					    "\"dist\" and \"qtrans\".", optarg);
			break;
		case 'M':
			if (strcmp(optarg, "quality") == 0)
				quality_mask_mode = casava::demultiplex::MODE_QUALITY;
			else if (strcmp(optarg, "lowercase") == 0)
				quality_mask_mode = casava::demultiplex::MODE_LOWERCASE;
			else
				fatal_error("Unknown mask quality mode \"%s\"; please "
					    "check option -M.\n"
					    "The available mask quality modes are "
					    "\"lowercase\" and \"quality\".",
					    optarg);
			break;
		case 'Q': 
			quality_shift = strtol(optarg, &tmp, 10);
			if (tmp == optarg || quality_shift < 0 || quality_shift > 127) {
				fatal_error("Expected quality_shift between 0 "
					    "and 127!  Please check option -Q.");
			}
			if (quality_shift != 33 && quality_shift != 64) {
				warning("quality_shift is set to %d, but it "
					"usually is 33 or 64!", quality_shift);
			}
			break;
		case OPTION_NO_QUALITY_VALUES:
			simulate_quality_values = false;
			break;
		case OPTION_NO_SUBSTITUTION_ERRORS:
			simulate_substitution_errors = false;
			error_rate = 0.0;
			break;
		case OPTION_NO_INDELS:
			simulate_indels = false;
			break;
		case OPTION_NO_GC_BIAS:
			simulate_gc_bias = false;
			break;
		case 'o': 
			output_prefix = optarg;
			break;
		case 'c': 
			OutputStream::set_default_output_type(optarg);
			break;
		case 'z':
			OutputStream::set_default_output_type(GZIP);
			break;
		case 'n':
			write_log_files = false;
			break;
		case 't':
			#ifdef ENABLE_THREADS
			num_simulator_threads = strtol(optarg, &tmp, 10);
			if (tmp == optarg || num_simulator_threads <= 0) {
				fatal_error("The number of threads must be "
					    "a positive integer!  Please "
					    "check option -t.");
			}
			#else
			fatal_error("pIRS is not compiled with support for "
					"threads!  Option -t is not allowed.");
			#endif
			break;
		case 'S': {
				random_seed = strtoull(optarg, &tmp, 10);
				if (tmp == optarg)
					fatal_error("The random seed must be "
						    "an integer!  Please check "
						    "option -S.");
				user_specified_random_seed = true;
			}
			break;
		case 'q':
			info_messages_fp = NULL;
			break;
		case 'h': 
			pirs_simulate_usage();
			exit(0);
		case 'V':
			program_info();
			exit(0);
		default: 
			pirs_simulate_usage_short();
			exit(1);
		}
	}

	argc -= optind;
	argv += optind;
	input_refs = const_cast<const char**>(argv);
	num_input_refs = argc;

	if (num_input_refs == 0) {
		pirs_simulate_usage_short();
		fatal_error("No reference sequences supplied!");
	}
	if (diploid && num_input_refs != 2) {
		fatal_error("The `--diploid' option was given, but exactly 2 "
			     "reference sequences were not supplied.");
	}

	if (insert_len_sd == -1.0)
		insert_len_sd = insert_len_mean * 0.10;

	if ((int)insert_len_mean < read_len)
		fatal_error("The insert size mean cannot be less than the "
			    "read length; please check option -m.");

	if (!user_specified_random_seed)
		random_seed = (uint64_t)time(NULL) * (uint64_t)getpid();
#ifdef ENABLE_THREADS
	if (num_simulator_threads == -1) {
		int nprocs = omp_get_num_procs();
		if (OutputStream::default_output_type_is_compressed())
			num_simulator_threads = nprocs - 3;
		else
			num_simulator_threads = nprocs - 2;

		if (num_simulator_threads < 1)
			num_simulator_threads = 1;
	}
#endif
}

static const char *get_subst_error_algo_name(enum SubstitutionErrorAlgorithm algo)
{
	switch (algo) {
	case ALGO_DIST:
		return "dist matrix algorithm";
	case ALGO_QTRANS:
		return "quality transition mode algorithm";
	default:
		assert(0);
	}
}

/*
 * Open the input files, output FASTQ/FASTA files, and log files.
 */
SimulationFiles::SimulationFiles(const SimulationParameters &params)
{
	char buf[params.output_prefix.length() + 50];
	sprintf(buf, "%s_%d_%d", params.output_prefix.c_str(),
		(int)params.read_len, (int)params.insert_len_mean);
	string prefix_long(buf);
	const char *fasta_suffix = (params.simulate_quality_values) ? ".fq" : ".fa";

	in_files = new InputStream[params.num_input_refs];
	for (unsigned i = 0; i < params.num_input_refs; i++)
		in_files[i].open(params.input_refs[i]);

	out_file_1.open(prefix_long + "_1" + fasta_suffix);
	out_file_2.open(prefix_long + "_2" + fasta_suffix);
	if (params.write_log_files) {
		insert_distr_log_file.open(prefix_long + ".insert_len.distr");
		insert_distr_log_file.write_header();
		if (params.error_rate != 0.0) {
			error_distr_log_file.open(prefix_long + ".error_rate.distr");
			error_distr_log_file.write_header();
		}
		info_log_file.open(prefix_long + ".read.info");
		info_log_file.write_header();
	}
}


/* Open and read into memory all profile files needed for the simulation. */
SimulationProfiles::SimulationProfiles(const SimulationParameters &params)
{
	if (params.simulate_substitution_errors)
		base_calling_profile = new BaseCallingProfile(params);
	else
		base_calling_profile = NULL;

	if (params.simulate_indels)
		indel_profile = new IndelProfile(params);
	else
		indel_profile = NULL;

	if (params.simulate_gc_bias)
		gc_bias_profile = new GCBiasProfile(params);
	else
		gc_bias_profile = NULL;
}

/* Delete the in-memory profiles that were used for the simulation. */
SimulationProfiles::~SimulationProfiles()
{
	delete base_calling_profile;
	delete indel_profile;
	delete gc_bias_profile;
}

/* 
 * Writes a read to an output FASTA or FASTQ file.
 *
 * We know whether it's FASTA or FASTQ based on whether the read.quality_vals()
 * vector is empty or not.
 *
 * The number of the read in the pair (1 or 2) is figured out based on whether
 * this is read 1 or read 2 in the pair of which the read is a member, an
 * whether the reverse order flag is marked on the read pair.
 */
static void output_read(const Read &read, OutputStream &out_file)
{
	out_file.printf("%cread_%d_%"PRIu64"/%d\n",
		        (read.quality_vals.empty()) ? '>' : '@',
			read.pair.insert_len_mean,
			read.pair.pair_number, read.num_in_pair());
	out_file.write(&read.seq[0], read.seq.size());
	out_file.putc('\n');

	if (!read.quality_vals.empty()) {
		char ascii_quality_vals[2 + read.seq.size() + 1];
		char *p = ascii_quality_vals;
		*p++ = '+';
		*p++ = '\n';
		for (size_t i = 0; i < read.seq.size(); i++)
			*p++ = read.quality_vals[i] + read.pair.quality_shift;
		*p++ = '\n';
		out_file.write(ascii_quality_vals, sizeof(ascii_quality_vals));
	}
}

/*
 * Log some information about a read to the read info log file
 * (PREFIX.read_info{,.gz})
 */
static void log_read_info(const Read &read, OutputStream &info_log_file)
{
	if (!info_log_file.is_open())
		return;

	info_log_file.printf("read_%d_%"PRIu64"/%d\t%s\t%s\t%zu\t%c\t%d\t%d\t",
			     (int)read.pair.insert_len_mean, read.pair.pair_number,
			     read.num_in_pair(), read.pair.ref_filename,
			     read.pair.ref_seq_id, read.pair.ref_seq_pos,
			     read.orientation(),
			     read.pair.insert_len, read.mask_end_len);

	// Log base substitutions.
	if (read.error_pos.empty()) {
		info_log_file.putc('-');
	} else {
		vector<int>::const_iterator it = read.error_pos.begin();
		do {
			info_log_file.printf("%d,%c->%c;",
						*it + 1,
						read.raw_read[*it],
						read.seq[*it]);
		} while (++it != read.error_pos.end());
	}
	info_log_file.putc('\t');
	
	// Log insertions
	bool found;
	vector<Indel>::const_iterator it;
	int idx_delta = 0; // Change from old to new index
	found = false;
	for (it = read.indels.begin(); it != read.indels.end(); it++) {
		if (it->len > 0)  {
			// Look in the read.seq, the output base calls, to find
			// the insertion.
			info_log_file.printf("%d,", it->ref_idx);
			info_log_file.write(&read.seq[it->ref_idx + idx_delta],
					    it->len);
			info_log_file.putc(';');
			found = true;
		}
		idx_delta += it->len;
	}
	if (!found)
		info_log_file.putc('-');
	info_log_file.putc('\t');

	// Log deletions
	found = false;
	for (it = read.indels.begin(); it != read.indels.end(); it++) {
		if (it->len < 0)  {
			// Look in the read.ref_read, the reference read, to
			// find the deletion.
			info_log_file.printf("%d,", it->ref_idx);
			info_log_file.write(&read.ref_read[it->ref_idx], -it->len);
			info_log_file.putc(';');
			found = true;
		}
	}
	if (!found)
		info_log_file.putc('-');
	info_log_file.putc('\n');
}

/*
 * Given a read length and an indel error profile, construct a list of indels
 * that will be applied to a read of that length.
 *
 * @indels is an empty vector that will be filled in with Indel structures that
 * describe the location within the reference read and the length of each indel.
 *
 * The return value is the length of the part of the reference sequence that is
 * needed to generate the desired read.  The generated read is of length exactly
 * @read_len.  However, the part of the reference sequence from which it comes
 * may be less than @read_len base-pairs (in the case of insertions into the
 * read) or greater than @read_len base-pairs (in the case of deletions from the
 * read), and this length is what is returned.
 */
int build_indel_list(vector<Indel> &indels, int read_len,
		     const IndelProfile &indel_profile, Random &rgen)
{
	int ref_idx = 0;
	int raw_idx = 0;
	for (; raw_idx < read_len; ref_idx++, raw_idx++) {
		int indel_len = indel_profile.get_indel_len(raw_idx, rgen);
		if (indel_len != 0) {
			if (indel_len < 0) {
				 // deletion: advance ref_idx
				ref_idx += -indel_len;
			} else {
				// insertion: advance raw_idx

				// Truncate inserts that would overflow the read
				// length.
				if (raw_idx + indel_len > read_len)
					indel_len = read_len - raw_idx;
				raw_idx += indel_len;
			}
			// Record the insertion in the @indels array.
			indels.push_back(Indel(ref_idx, indel_len));
		}
	}
	return ref_idx;
}

/*
 * Applys a list of indels that was constructed by build_indel_list().  The
 * indels are applied to the ref_seq to create the raw_seq.
 */
void apply_indel_list(const vector<Indel> &indels, const vector<char> &ref_seq,
		      vector<char> &raw_seq, SimulationCounters &counters,
		      Random &rgen)
{
	int ref_idx = 0;
	int raw_idx = 0;
	vector<Indel>::const_iterator indel;
	for (indel = indels.begin(); indel != indels.end(); indel++) {
		// Advance forward to the location of the indel, copying the
		// corresponding reference sequence to the resulting raw_seq until
		// then.
		while (ref_idx < indel->ref_idx)
			raw_seq[raw_idx++] = ref_seq[ref_idx++];

		if (indel->len < 0) {
			// Deletion; skip over some of the reference sequence.
			ref_idx += -indel->len;
			counters.del_count++;
			counters.del_sum += -indel->len;
		} else {
			// Insertion; insert a random DNA sequence into the 
			// raw sequence.
			rgen.random_dna_seq(&raw_seq[raw_idx], indel->len);
			raw_idx += indel->len;
			counters.ins_count++;
			counters.ins_sum += indel->len;
		}
	}
	// No more indels.  Copy the rest of the reference sequence over, if
	// needed.
	while (raw_idx < (int)raw_seq.size())
		raw_seq[raw_idx++] = ref_seq[ref_idx++];
}

/*
 * Calls the bases in a read.
 *
 * We take the read.raw_seq and turn it into the read.seq by applying
 * substitution errors, as per the base-calling profile @profile.  This process
 * also generates the quality values vector read.quality_vals.
 */
static void call_read_bases(Read &read,
			    SimulationCounters &counters,
			    int cycle_offset,
			    const BaseCallingProfile &profile,
			    Random &rgen)
{
	// We need to keep track of whether the current position is an insert
	// error or not.  The inserts are just random bases, and it does not
	// make sense to simulate substitution error on them.
	vector<Indel>::const_iterator indel = read.indels.begin();

	int ref_idx = 0;
	int insert_remaining = 0;
	int qscore;
	int prev_qscore = 0;
	char raw_base;
	char called_base;
	for (size_t i = 0; i < read.seq.size(); i++,
					insert_remaining--, ref_idx++) 
	{
		int cycle = i + cycle_offset;
		if (indel != read.indels.end() && ref_idx == indel->ref_idx) {
			if (indel->len > 0)
				insert_remaining = indel->len; // insertion
			else
				ref_idx += -indel->ref_idx; // deletion
			indel++;
		}

		raw_base = read.raw_read[i];

		called_base = profile.call(cycle, raw_base, prev_qscore,
					   qscore, rgen);

		// If it's an insertion here, don't call a different base, but
		// keep the quality score given to us by the base calling
		// profile.
		if (insert_remaining > 0)
			called_base = raw_base;

		if (called_base != raw_base) {
			read.error_pos.push_back(i);
			counters.error_pos_distr[cycle - 1]++;
		}

		read.seq[i] = called_base;
		if (!read.quality_vals.empty())
			read.quality_vals[i] = qscore;
		prev_qscore = qscore;
	}
}


/*
 * Simulates a read pair.
 *
 * The read pair is simulated from the reference sequence @ref_seq of length
 * @ref_seq_len.  @any_non_dna_chars indicates whether the reference sequence
 * contains any unknown sequence.  The read pair is simulated based on the
 * parameters for the simulation (@params), such as the insert length and read
 * length, and on the profiles for the simulation (@profiles), including the
 * base-calling profile, indel-error profile, and GC-bias profile (depending on
 * the parameters, some profiles may not be used).
 *
 * Statistics about the simulated pair are added to counters within the
 * @counters structure.
 *
 * Returns %true if a read pair was successfully simulated, and %false if it was
 * not.  The function should be called repeatedly with the same parameters until
 * it returns %true, since the failure to produce a read pair may happen for
 * several reasons, all of which are random.
 */
static bool simulate_read_pair(ReadPair &pair,
			       const char *ref_seq, size_t ref_seq_len, 
			       bool any_non_dna_chars,
			       const SimulationParameters &params,
			       const SimulationProfiles &profiles,
			       SimulationCounters &counters,
			       Random &rgen)
{
	int insert_len = (int)rgen.rnorm(params.insert_len_mean,
					 params.insert_len_sd);
	pair.insert_len = insert_len;
	if (insert_len < params.read_len)
		return false;

	if ((size_t)insert_len > ref_seq_len)
		return false;

	pair.ref_seq_pos = rgen.next_uint64() % (ref_seq_len - insert_len);
	const char *insert = &ref_seq[pair.ref_seq_pos];
	int ref_read_1_len, ref_read_2_len;
	Read &read_1 = pair.read_1;
	Read &read_2 = pair.read_2;

	counters.num_inserts++;

	// Throw away this insert if the GC bias profile tells us to.
	if (params.simulate_gc_bias) {
		if (!profiles.gc_bias_profile->accept_insert(insert,
							     insert_len,
							     rgen)) 
		{
			counters.discarded_inserts++;
			return false;
		}
	}

	read_1.indels.clear();
	read_2.indels.clear();
	if (params.simulate_indels) {
		ref_read_1_len = build_indel_list(read_1.indels,
						  params.read_len,
						  *profiles.indel_profile,
						  rgen);


		ref_read_2_len = build_indel_list(read_2.indels,
						  params.read_len,
						  *profiles.indel_profile,
						  rgen);

		// If the insert length is very close to the read length, it is
		// possible that one of the read lengths is now greater than it
		// if there were deletions done.  This should be rare with a
		// reasonable read length and insert length... But if it does
		// happen, return false and try simulating a new read pair.
		if (ref_read_1_len > insert_len ||
					ref_read_2_len > insert_len)
			return false;
	} else {
		ref_read_1_len = params.read_len;
		ref_read_2_len = params.read_len;
	}

	read_1.ref_read.resize(ref_read_1_len);
	read_2.ref_read.resize(ref_read_2_len);

	if (params.jumping) {
		std::copy(&insert[insert_len - ref_read_2_len],
			  &insert[insert_len],
			  read_2.ref_read.begin());
		for (int i = 0; i < ref_read_1_len; i++)
			read_1.ref_read[i] = dna_char_complement(insert[ref_read_1_len - 1 - i]);
	} else {
		std::copy(&insert[0], &insert[ref_read_1_len],
		          read_1.ref_read.begin());
		for (int i = 0; i < ref_read_2_len; i++)
			read_2.ref_read[i] = dna_char_complement(insert[insert_len - 1 - i]);
	}

	// Check whether either read contains 'N' or any other nonbase
	// characters, and don't simulate reads from this place if this is the
	// case.
	if (any_non_dna_chars) {
		if (seq_contains_non_dna_chars(&read_1.ref_read[0], ref_read_1_len))
			return false;
		if (seq_contains_non_dna_chars(&read_2.ref_read[0], ref_read_2_len))
			return false;
	}

	
	// The 'raw' reads here refers to the reads produced after doing the
	// indel-simulation step but before doing the substitution-error step.
	read_1.raw_read.resize(params.read_len);
	read_2.raw_read.resize(params.read_len);

	apply_indel_list(read_1.indels, read_1.ref_read,
			 read_1.raw_read, counters, rgen);

	apply_indel_list(read_2.indels, read_2.ref_read,
			 read_2.raw_read, counters, rgen);

	if (params.simulate_quality_values) {
		read_1.quality_vals.resize(params.read_len);
		read_2.quality_vals.resize(params.read_len);
	} else {
		read_1.quality_vals.resize(0);
		read_2.quality_vals.resize(0);
	}
	read_1.error_pos.clear();
	read_2.error_pos.clear();
	read_1.seq.resize(params.read_len);
	read_2.seq.resize(params.read_len);
	if (params.simulate_substitution_errors) {
		call_read_bases(read_1, counters, 1,
				*profiles.base_calling_profile,
				rgen);

		call_read_bases(read_2, counters, params.read_len + 1,
				*profiles.base_calling_profile,
				rgen);

		// Mask quality using the EAMSS algorithm
		if (params.quality_mask_mode != casava::demultiplex::MODE_NONE) {
			casava::demultiplex::MaskQvalsByEamss masker;
			read_1.mask_end_len = masker(read_1.quality_vals,
						     read_1.seq,
						     params.quality_mask_mode);
			read_2.mask_end_len = masker(read_2.quality_vals,
						     read_2.seq,
						     params.quality_mask_mode);

			counters.masked_bases += read_1.mask_end_len +
						 read_2.mask_end_len;
		}
		
		//stat into quality_to_error_rate_distr
		//string combine_quality_seq = output_quality_seq1 + output_quality_seq2;
		//for(int i = 0; i < params.read_len * 2; i++)
		//{
			//int qval = combine_quality_seq[i] - params.quality_shift;
			//quality_to_error_rate_distr[i + 1] +=  pow(10, double(qval)/double(-10));
		//}
	} else {
		// Not simulating substitution errors in the reads, so the final
		// reads are the same as the raw reads.  We may, however, still
		// need to simulate quality values; but we just set the quality
		// values to the highest value in this case.
		std::copy(read_1.raw_read.begin(), read_1.raw_read.end(),
			  read_1.seq.begin());
		std::copy(read_2.raw_read.begin(), read_2.raw_read.end(),
			  read_2.seq.begin());
		if (params.simulate_quality_values) {
			read_1.quality_vals.assign(params.read_len, 40);
			read_2.quality_vals.assign(params.read_len, 40);
		}
	}

	// insert_len statistics
	counters.insert_len_distr[insert_len]++;
	return true;
}

/*
 * The following is compiled only if we are supporting the multi-threaded
 * version.  There are three types of threads: simulator threads (there may be
 * any number of these), read info writer threads (there is exactly one of
 * these), and read writer threads (there are exactly two of these).  Each type
 * of thread has its own function it executes until it exits.
 */
#ifdef ENABLE_THREADS

/*
 * This function is executed by the simulator threads.  Each simulator thread
 * repeatedly gets a ReadPairSet from the read_pair_free_queue, fills it up with
 * simulated read pairs, and puts it in the read_pair_ready_queue.
 *
 * Each simulator thread has its own random number generator and counters.
 *
 * We know when to stop by watching the pairs_remaining number.  We decrement
 * it, with the corresponding lock held, when we have received a read pair.  If
 * there are less than READS_PER_SET read pairs remaining to be simulated, we
 * only simulate the needed number, then mark the next pair as empty (the
 * ref_seq_id char pointer field is used for this) before putting it in the
 * read_pair_ready_queue and exiting the thread.
 */
static void read_simulator_thread_proc(const char *ref_seq,
				       size_t ref_seq_len,
				       bool any_non_dna_chars,
				       const SimulationParameters &params,
				       const SimulationProfiles &profiles,
				       SimulationCounters &counters,
				       SharedQueue<ReadPairSet*>& read_pair_free_queue,
				       SharedQueue<ReadPairSet*>& read_pair_ready_queue,
				       uint64_t &pairs_remaining,
				       Lock &pairs_remaining_lock,
				       Random &rgen)
{
	bool is_last = false;
	ReadPairSet *pair_set;
	unsigned pairs_to_simulate;
	do {
		pair_set = read_pair_free_queue.get();

		pairs_remaining_lock.lock();
		if (pairs_remaining < READS_PER_SET) {
			is_last = true;
			pairs_to_simulate = pairs_remaining;
			pairs_remaining = 0;
			pair_set->pairs[pairs_to_simulate].ref_seq_id = NULL;
		} else {
			pairs_remaining -= READS_PER_SET;
			pairs_to_simulate = READS_PER_SET;
		}
		pairs_remaining_lock.unlock();

		for (unsigned i = 0; i < pairs_to_simulate; i++) {
			while (!simulate_read_pair(pair_set->pairs[i],
						   ref_seq, ref_seq_len,
						   any_non_dna_chars, params,
						   profiles, counters, rgen))
				;
		}
		read_pair_ready_queue.put(pair_set);
	} while (!is_last);
}

/*
 * This function is executed by the read info writer thread, and there is only
 * one of this thread.  It repeatedly retrieves a set of simulated read pairs
 * from the read_pair_ready_queue and two unused read sets from the
 * read_1_free_queue and read_2_free_queue.  These two read sets are filled in
 * with the reads from the pair, and put into the read_1_ready_queue and
 * read_2_ready_queue for the writer threads to take.  Concurrently with the
 * writers writing the reads, the info writer thread writers an entry in the
 * read info log for the read pair.
 *
 * The read pair set therefore has three concurrent readers--- the read info
 * writer thread, and the two read writer threads.  It needs to be put back into
 * the read_pair_free_queue, but only after all three concurrent reads are done.
 * To solve this problem, a reference count is introduced into the read pair
 * set; it is initialized to 3, and upon reaching 0 references, it will be
 * placed back into the read_pair_free_queue.
 *
 * All the reads go through the info writer thread.  Therefore, it's able to
 * keep track of what number read we are on, and it knows when all the read
 * pairs for the sequence have been simulated.  The @seq_num_reads parameter is
 * the number of read pairs we must simulate, and the @read_pair_num_offset is
 * the number of read pairs that have been simulated from previous sequences.
 * The simulator threads, therefore, do not know what numbered reads they are
 * simulating (although they know when there are no more reads to simulate---
 * see the read_simulator_thread_proc()), as these numbers are assigned by the
 * info writer thread.
 *
 * One more thing needs to be considered, which is what happens at the end of
 * the simulation for this reference sequence.  Each simulator thread will send
 * us a partial read pair set (pair->ref_seq_id set to NULL on one of the pairs)
 * at some point.  However, we should not terminate and signal the writers until
 * the last of these partial read pair sets is received.  Luckily, this thread
 * is counting the reads anyway; just set a special flag on the read sets we
 * send to the writers when it's the last one.  (The writers must handle partial
 * read sets that are *not* the last one--- they will not have this special flag
 * set.)
 */
static void read_info_writer_thread_proc(SharedQueue<ReadPairSet*> &read_pair_free_queue,
					 SharedQueue<ReadPairSet*> &read_pair_ready_queue,
					 SharedQueue<ReadSet*> &read_1_free_queue,
					 SharedQueue<ReadSet*> &read_1_ready_queue,
					 SharedQueue<ReadSet*> &read_2_free_queue,
					 SharedQueue<ReadSet*> &read_2_ready_queue,
					 OutputStream &info_log_file,
					 uint64_t seq_num_reads,
					 uint64_t read_pair_num_offset,
					 RandomBitGenerator &rgen)
{
	uint64_t seq_read_num = 1;
	bool is_last = false;
	do {
		ReadPairSet *pair_set = read_pair_ready_queue.get();
		ReadSet *read_1_set = read_1_free_queue.get();
		ReadSet *read_2_set = read_2_free_queue.get();
		read_1_set->is_last = false;
		read_2_set->is_last = false;
		for (size_t i = 0; i < READS_PER_SET; i++) {
			ReadPair *pair = &pair_set->pairs[i];
			if (!pair->ref_seq_id) {
				read_1_set->reads[i] = NULL;
				read_2_set->reads[i] = NULL;
				if (seq_read_num + i > seq_num_reads) {
					is_last = true;
					read_1_set->is_last = true;
					read_2_set->is_last = true;
				}
				break;
			}
			if (rgen.next_bit()) {
				pair->reverse_order = true;
				read_1_set->reads[i] = &pair->read_2;
				read_2_set->reads[i] = &pair->read_1;
			} else {
				pair->reverse_order = false;
				read_1_set->reads[i] = &pair->read_1;
				read_2_set->reads[i] = &pair->read_2;
			}
			pair->pair_number = seq_read_num + i + read_pair_num_offset;
		}
		read_1_set->pair_set = pair_set;
		read_2_set->pair_set = pair_set;
		pair_set->set_refs(3);
		read_1_ready_queue.put(read_1_set);
		read_2_ready_queue.put(read_2_set);
		for (size_t i = 0; i < READS_PER_SET; i++, seq_read_num++) {
			ReadPair *pair = &pair_set->pairs[i];
			if (!pair->ref_seq_id)
				break;

			if (seq_read_num == seq_num_reads ||
					(seq_read_num % 25000 == 0 && seq_read_num != 1))
			{
				info("Simulated %"PRIu64" of %"PRIu64" read pairs\n",
						seq_read_num, seq_num_reads);
			}

			if (pair->reverse_order) {
				log_read_info(pair->read_2, info_log_file);
				log_read_info(pair->read_1, info_log_file);
			} else {
				log_read_info(pair->read_1, info_log_file);
				log_read_info(pair->read_2, info_log_file);
			}
		}
		if (pair_set->put_ref())
			read_pair_free_queue.put(pair_set);
	} while (!is_last);
}

/* This function is executed by the read writer threads.  There are two of these
 * threads, one for each of the output FASTA or FASTQ files.
 *
 * Each of these threads repeatedly retrieves a read set from its
 * read_ready_queue, writes the reads contained in it, and returns the read set
 * to the read_free_queue.  When we are done with the read set, we are also done
 * with the read pair, so we need to decrement the reference count on the read
 * pair (it may be shared with the other writer thread, and the info writer
 * thread) and return it to the read_pair_free_queue if all these threads are
 * done.
 *
 * Near the end of the simulation, the read sets may not be full; the end is
 * marked by a NULL read pointer.  The very last read set received has the
 * is_last flag set, so the writer knows when to terminate.
 */
static void read_writer_thread_proc(SharedQueue<ReadSet*> &read_free_queue,
				    SharedQueue<ReadSet*> &read_ready_queue,
				    SharedQueue<ReadPairSet*> &read_pair_free_queue,
				    OutputStream &out_file)
{
	bool is_last = false;
	do {
		ReadSet *set = read_ready_queue.get();
		for (size_t i = 0; i < READS_PER_SET; i++) {
			if (!set->reads[i]) {
				is_last = set->is_last;
				break;
			}
			output_read(*set->reads[i], out_file);
		}
		if (set->pair_set->put_ref())
			read_pair_free_queue.put(set->pair_set);
		read_free_queue.put(set);
	} while (!is_last);
}


#else /* ENABLE_THREADS */

/*
 * This function is compiled only if multiple threads are not supported.  It's a
 * bit redundant, but it's needed because it combines the functionality of the
 * read info writer thread and the two writer threads.  This function will write
 * the read pair @pair to both of the output files as well as log its
 * information to the read info log.
 */
static void output_read_pair(ReadPair &pair,
			     uint64_t seq_pair_num,
			     uint64_t seq_num_pairs,
			     SimulationFiles &files, 
			     RandomBitGenerator &bits)
{
	if (seq_pair_num == seq_num_pairs ||
			(seq_pair_num % 25000 == 0 && seq_pair_num != 1))
	{
		info("Simulated %"PRIu64" of %"PRIu64" read pairs\n",
				seq_pair_num, seq_num_pairs);
	}
	// We have a read 1 and read 2 ready to output.  But we should randomize
	// the output files they go to.  (In real data, we don't have all the
	// forward reads in one file; it's random).  We only need one bit of
	// randomness to do this.

	if (bits.next_bit()) {
		pair.reverse_order = false;
		log_read_info(pair.read_1, files.info_log_file);
		log_read_info(pair.read_2, files.info_log_file);
		output_read(pair.read_1, files.out_file_1);
		output_read(pair.read_2, files.out_file_2);
	} else {
		pair.reverse_order = true;
		log_read_info(pair.read_2, files.info_log_file);
		log_read_info(pair.read_1, files.info_log_file);
		output_read(pair.read_2, files.out_file_1);
		output_read(pair.read_1, files.out_file_2);
	}
}

#endif /* ENABLE_THREADS */

/*
 * This function is called to simulate reads on a reference sequence
 * (chromosome, contig, scaffold, etc.).  It's called by both the multithreaded
 * and single-threaded versions of pIRS, but this function is implemented
 * differently for each version.
 *
 * The multi-threaded version will initialize some queues containing sets of
 * Read and ReadPair structures, then fork off a number of threads to split up
 * the work.
 *
 * The single-threaded version, on the other hand, will initialize a single
 * ReadPair structure and then do all the work itself, repeatedly filling it in
 * with a simulated read pair by calling simulate_read_pair() and then writing
 * it by calling output_read().
 *
 * Both versions need to initialize the read pair(s) with some common variables
 * such as the reference sequence ID.
 *
 * Both versions need to initialize a random number generator based on the
 * random seed parameter (params.random_seed).  In the single-threaded version
 * this is done one time, but in the multi-threaded version, this is done once
 * for each thread, with the thread ID added to the random seed to make sure all
 * the threads are using a unique seed.
 *
 * The @counters parameter is a reference to a SimulationCounters structure that
 * keeps tracks of statistics for the simulation.  The single-threaded version
 * can update this structure directly.  The multi-threaded version creates a
 * temporary counter for each simulator thread, then combines all these counters
 * into the main counter after the threads have terminated.
 *
 * @read_pair_num_offset is the number of reads that have already been simulated
 * from other reference sequences, and equivalently the number that should be
 * added to the read pair numbers to make them continue going up from the
 * previous calls to this function.
 */
static uint64_t simulate_read_pairs(const char *ref_seq, size_t ref_seq_len, 
				    const char *ref_seq_id,
				    const char *ref_filename,
				    uint64_t read_pair_num_offset,
				    const SimulationParameters &params,
				    SimulationFiles &files,
				    const SimulationProfiles &profiles,
				    SimulationCounters &counters)
{
	bool any_non_dna_chars = seq_contains_non_dna_chars(ref_seq, ref_seq_len);
	uint64_t num_read_pairs = uint64_t(ref_seq_len * params.coverage / (2 * params.read_len));
	if (params.diploid)
		num_read_pairs /= params.num_input_refs;

#if ENABLE_THREADS
	size_t queue_size = params.num_simulator_threads * 8;
	SharedQueue<ReadPairSet*> read_pair_free_queue(queue_size);
	SharedQueue<ReadPairSet*> read_pair_ready_queue(queue_size);
	SharedQueue<ReadSet*> read_1_free_queue(queue_size);
	SharedQueue<ReadSet*> read_2_free_queue(queue_size);
	SharedQueue<ReadSet*> read_1_ready_queue(queue_size);
	SharedQueue<ReadSet*> read_2_ready_queue(queue_size);
	for (size_t i = 0; i < queue_size; i++) {
		ReadPairSet *pair_set = new ReadPairSet();
		for (size_t j = 0; j < READS_PER_SET; j++) {
			ReadPair *pair = &pair_set->pairs[j];
			pair->ref_seq_id      = ref_seq_id;
			pair->ref_filename    = ref_filename;
			pair->insert_len_mean = params.insert_len_mean;
			pair->quality_shift   = params.quality_shift;
			pair->cyclicized      = params.jumping;
		}
		read_pair_free_queue.put(pair_set);
		read_1_free_queue.put(new ReadSet());
		read_2_free_queue.put(new ReadSet());
	}
	SimulationCounters tl_counters[params.num_simulator_threads];
	for (size_t i = 0; i < ARRAY_LEN(tl_counters); i++)
		tl_counters[i].init(params.read_len);
	uint64_t pairs_remaining = num_read_pairs;
	Lock pairs_remaining_lock;
	#pragma omp parallel num_threads(params.num_simulator_threads + 3)
	{
		if (omp_get_num_threads() != params.num_simulator_threads + 3) {
			fatal_error("Not enough threads; is OpenMP disabled?\n"
				    "Compile with --disable-threads if you want"
				    "to run pIRS without multiple threads.");
		}
		int thread_num = omp_get_thread_num();
		RandomBitGenerator rgen(params.random_seed + thread_num);
		switch (thread_num) {
		case 0:
			read_info_writer_thread_proc(read_pair_free_queue,
						     read_pair_ready_queue,
						     read_1_free_queue,
						     read_1_ready_queue,
						     read_2_free_queue,
						     read_2_ready_queue,
						     files.info_log_file,
						     num_read_pairs,
						     read_pair_num_offset,
						     rgen);
			break;
		case 1:
			read_writer_thread_proc(read_1_free_queue,
						read_1_ready_queue,
						read_pair_free_queue,
						files.out_file_1);
			break;
		case 2:
			read_writer_thread_proc(read_2_free_queue,
						read_2_ready_queue,
						read_pair_free_queue,
						files.out_file_2);
			break;
		default:
			read_simulator_thread_proc(ref_seq, ref_seq_len,
						   any_non_dna_chars,
						   params,
						   profiles,
						   tl_counters[thread_num - 3],
						   read_pair_free_queue,
						   read_pair_ready_queue,
						   pairs_remaining,
						   pairs_remaining_lock,
						   rgen);
			break;
		}
	}

	// Combine the counters together.
	for (size_t i = 0; i < ARRAY_LEN(tl_counters); i++)
		counters.add(tl_counters[i]);
#else // !ENABLE_THREADS

	// Single-threaded version of simulate_read_pairs()

	RandomBitGenerator rgen(params.random_seed);
	ReadPair pair;
	pair.ref_seq_id      = ref_seq_id;
	pair.ref_filename    = ref_filename;
	pair.insert_len_mean = params.insert_len_mean;
	pair.quality_shift   = params.quality_shift;
	pair.cyclicized      = params.jumping;

	for (uint64_t i = 0; i < num_read_pairs; i++) {
		while (!simulate_read_pair(pair, ref_seq, ref_seq_len,
					   any_non_dna_chars,
					   params, profiles, counters,
					   rgen))
			;

		pair.pair_number = i + 1 + read_pair_num_offset;
		output_read_pair(pair, i + 1, num_read_pairs, files, rgen);
	}
#endif // !ENABLE_THREADS

	// Note: in the multi-threaded version, the SharedQueues will be
	// destructed here.
	return num_read_pairs;
}


// Calculate and return the mean of the insert length distribution.
static double dist_mean(const map<int, uint64_t> &distr)
{
	map<int, uint64_t>::const_iterator it;
	uint64_t sum = 0;
	uint64_t count = 0;
	for (it = distr.begin(); it != distr.end(); it++) {
		sum += it->first * it->second;
		count += it->second;
	}
	return double(sum) / double(count);
}

// Calculate and return the standard deviation of the insert length
// distribution.
static double dist_sd(const map<int, uint64_t> &distr, double mean)
{
	map<int, uint64_t>::const_iterator it;
	double sq_dev_sum = 0.0;
	uint64_t count = 0;
	for (it = distr.begin(); it != distr.end(); it++) {
		double dev = it->first - mean;
		sq_dev_sum += (dev * dev) * double(it->second);
		count += it->second;
	}
	double variance = sq_dev_sum / (count - 1);
	return sqrt(variance);
}

// Output the insert length distribution to the insert length distribution log
// file (*.insert_len.distr{,.gz})
static void log_insert_len_distr(OutputStream &insert_log,
				 map<int, uint64_t> &insert_len_distr,
				 double insert_len_mean, double insert_len_sd)
{
	double mean = dist_mean(insert_len_distr);
	double sd = dist_sd(insert_len_distr, mean);

	insert_log.printf(
	"#\n"
	"# This file shows the length distribution of the simulated inserts.\n"
	"# We were trying to simulate inserts with a mean length of %g and a\n"
	"# standard deviation of %g.  The actual mean is %g, and the actual\n"
	"# standard deviation is %g.\n"
	"#\n"
	"# insert_len\tfrequency\n",
		insert_len_mean, insert_len_sd, mean, sd);

	map<int, uint64_t>::const_iterator it;
	for (it = insert_len_distr.begin(); it != insert_len_distr.end(); it++)
		insert_log.printf("%d\t%"PRIu64"\n", it->first, it->second);
}

// Output the error position distribution to the error position distribution log
// file. (*.error_pos.distr{,.gz})
static void log_error_pos_distr(OutputStream &error_log,
				const uint64_t error_pos_distr[],
				const SimulationParameters &params,
				const BaseCallingProfile &profile,
				uint64_t num_read_pairs)
{
	error_log.printf(
	"#\n"
	"# This file shows the error position distribution of the simulated reads.\n"
	"# The cycle number indicates the position in the two reads.  The frequency\n"
	"# and average rate of errors in each position is provided.\n"
	"#\n"
	"# The total number of read pairs that were simulated was %"PRIu64".\n"
	"# Each read was %d base pairs long, so there were %d cycles.\n"
	"# Substitution errors were simulated using the %s,\n",
		num_read_pairs, params.read_len, params.read_len * 2,
		get_subst_error_algo_name(params.subst_error_algo));
	if (params.error_rate == -1.0) {
		error_log.printf("# using the default error rate of the base-calling profile:\n"
				 "# \"%s\"\n", profile.filename.c_str());
	} else {
		error_log.printf("# using the base-calling profile \"%s\",\n"
				 "# modified to use the user-supplied error rate of %g.\n",
				 profile.filename.c_str(), params.error_rate);
	}
	error_log.puts("#\n");
	uint64_t total_errors = 0;
	error_log.puts("# Cycle\tError_frequency\tError_rate\n");
	for (int i = 0; i < 2 * params.read_len; i++) {
		error_log.printf("%d\t%"PRIu64"\t%g\n",
				 i + 1, error_pos_distr[i],
				 error_pos_distr[i] / double(num_read_pairs));
		total_errors += error_pos_distr[i];
	}
	error_log.printf("total\t%"PRIu64"\t%g\n",
			 total_errors,
			 total_errors / (double(num_read_pairs) * 2 * params.read_len));
}

static void log_parameter(OutputStream &info_log, const char *format, ...)
{
	va_list va;
	va_list va_2;
	va_start(va, format);
	va_copy(va_2, va);
	if (info_log.is_open()) {
		info_log.puts("# ");
		info_log.vprintf(format, va);
	}
	info(format, va_2);
	va_end(va);
	va_end(va_2);
}

/*
 * Writes the header for the read info log file (*.read_info{,.gz}).  This
 * header includes a listing of many of the read parameters, many of which we
 * also want to log to stdout/stderr using the info() function.  This is done
 * using a log_parameter() function that writes to both the log file and the
 * info() function.
 */
static void begin_info_log(OutputStream &info_log,
			   const SimulationParameters &params,
			   const SimulationProfiles &profiles)
{
	if (info_log.is_open()) {
		info_log.puts(
		"# \n"
		"# This file is a log of every read that was simulated.  It shows\n"
		"# exactly where each read came from and the substitution errors,\n"
		"# insertions, and deletions (if any) that were made to it.\n"
		"#\n"
		"# The following lists the parameters of the simulation:\n"
		"#\n"
		"# Input reference sequence files:   ");

		unsigned i;

		for (i = 0; i < params.num_input_refs - 1; i++)
			info_log.printf("%s, ", params.input_refs[i]);
		info_log.printf("%s\n", params.input_refs[i]);
	}

	info("\n");
	info("Beginning simulation with the following parameters:\n");
	info("\n");

	log_parameter(info_log, "Read length:                      %d\n",
		      params.read_len);
	log_parameter(info_log, "Insert length mean:               %g\n",
		      params.insert_len_mean);
	log_parameter(info_log, "Insert length standard deviation: %g\n",
		      params.insert_len_sd);
	log_parameter(info_log, "Coverage:                         %g\n",
		      params.coverage);
	log_parameter(info_log, "Diploid:                          %s\n",
		      bool_to_str(params.diploid));
	log_parameter(info_log, "Cyclized (jumping library):       %s\n", 
		      bool_to_str(params.jumping));
	log_parameter(info_log, "Simulate substitution errors:     %s\n",
		      bool_to_str(params.simulate_substitution_errors));
	if (params.error_rate == -1.0)
		log_parameter(info_log, "Substitution error rate:          default of base-calling profile\n");
	else
		log_parameter(info_log, "Substitution error rate:          %g\n",
			      params.error_rate);
	log_parameter(info_log, "Base-calling profile:             %s\n",
		      params.simulate_substitution_errors ? 
				profiles.base_calling_profile->filename.c_str()
				: "(None)");
	log_parameter(info_log, "Substitution error algorithm:     %s\n",
		      params.simulate_substitution_errors ? 
				get_subst_error_algo_name(params.subst_error_algo)
					: "(None)");
	log_parameter(info_log, "Simulate InDel errors:            %s\n",
		      bool_to_str(params.simulate_indels));
	log_parameter(info_log, "InDel error profile:              %s\n",
		      params.simulate_indels ?
				profiles.indel_profile->filename.c_str()
				: "(None)");
	log_parameter(info_log, "Simulate GC content bias:         %s\n",
		      bool_to_str(params.simulate_gc_bias));
	log_parameter(info_log, "GC bias profile:                  %s\n",
		      params.simulate_gc_bias ?
				profiles.gc_bias_profile->filename.c_str()
				: "(None)");
	log_parameter(info_log, "Output type:                      %s\n",
			OutputStream::get_default_file_type_str());
	log_parameter(info_log, "Output prefix:                    %s\n",
			params.output_prefix.c_str());
	log_parameter(info_log, "Simulate quality values:          %s\n",
			bool_to_str(params.simulate_quality_values));
		
	if (params.simulate_quality_values) {
		log_parameter(info_log, "ASCII shift of quality value      %d\n",
				params.quality_shift);
	}
	log_parameter(info_log, "Mode of mask quality:             %s\n",
			casava::demultiplex::get_quality_mask_mode_name(params.quality_mask_mode));

	log_parameter(info_log, "Random seed:                      %"PRIu64"\n",
			params.random_seed);
#ifdef ENABLE_THREADS
	log_parameter(info_log, "Number of simulator threads:      %d\n",
			params.num_simulator_threads);
#else
	log_parameter(info_log, "Number of simulator threads:      1 (no support for threads)\n");
#endif
	if (info_log.is_open()) {
		info_log.puts("#\n"
			      "# readId\treferenceFile\tcontig/scaffold/chromosome\t"
			      "position\torientation\tinsertSize\tmaskEndLen\t"
			      "substitutions\tinsertions\tdeletions\n");

		info("\n");
	}
}

/*
 * This is the main procedure that is executed when the `pirs simulate' command
 * is given.
 */
void pirs_simulate(int argc, char *argv[])
{
	time_t start_time = time(NULL);

	SimulationParameters params(argc, argv);

	info("Program:        "PROGRAM "\n");
	info("Version:        "VERSION "\n");
	info("Author:         "AUTHOR "\n");
	info("Contact:        "CONTACT "\n");
	info("Compile Date:   "__DATE__ " time: " __TIME__ "\n");
	info("Current time:   %s", ctime(&start_time));
	info("Command line:   %s\n", get_command_line());
	info("\n");

	SimulationFiles files(params);
	SimulationProfiles profiles(params);


	begin_info_log(files.info_log_file, params, profiles);

	string id;
	vector<char> seq;

	SimulationCounters counters(params.read_len);

	/* Main loop of the program; go through each input FASTA/FASTQ file and
	 * simulate reads from each sequence in it.  */
	uint64_t num_read_pairs = 0;
	uint64_t total_seq_len = 0;
	for (unsigned i = 0; i < params.num_input_refs; i++) {
		while (read_scaffold(files.in_files[i], id, seq)) {
			info("Simulating reads from scaffold \"%s\" (length = %zu)\n", 
						id.c_str(), seq.size());
			num_read_pairs += simulate_read_pairs(&seq[0],
						    	      seq.size(),
							      id.c_str(),
							      params.input_refs[i],
							      num_read_pairs,
							      params,
							      files,
							      profiles,
							      counters);
			info("Done simulating reads from scaffold \"%s\" (length = %zu)\n", 
						id.c_str(), seq.size());
			total_seq_len += seq.size();
		}
	}

	info("\n");
	info("Simulation complete (%lu seconds elapsed)\n",
			time(NULL) - start_time);
	info("\n");

	uint64_t num_bases = num_read_pairs * params.read_len * 2;
	uint64_t num_subst_errors = sum_array(counters.error_pos_distr, params.read_len * 2);

	info("Bases in reference sequences:    %"PRIu64"\n",
			total_seq_len);
	info("Read pairs simulated:            %"PRIu64"\n",
			num_read_pairs);
	info("Bases in reads:                  %"PRIu64"\n",
			num_bases);
	info("Coverage:                        %.2f\n",
			double(num_bases) / double(total_seq_len)
			* (params.diploid ? params.num_input_refs : 1.0));
	info("Substitution error count:        %"PRIu64"\n",
			num_subst_errors);
	info("Average substitution error rate: %.3f%%\n",
			100 * double(num_subst_errors) / num_bases);
	info("Insertion count:                 %"PRIu64"\n",
			counters.ins_count);
	info("Deletion count:                  %"PRIu64"\n",
			counters.del_count);
	info("Average insertion rate:          %.5f%%\n",
			100 * double(counters.ins_count) / num_bases);
	info("Average deletion rate:           %.5f%%\n",
			100 * double(counters.del_count) / num_bases);
	info("Average insertion length:        %.2f\n",
			counters.ins_count ? 
				double(counters.ins_sum) / counters.ins_count
				: 0.0);
	info("Average deletion length:         %.2f\n",
			counters.del_count ? 
				double(counters.del_sum) / counters.del_count
				: 0.0);
	info("Fragments affected by GC bias:   %.2f%%\n",
			100 * double(counters.discarded_inserts) / double(counters.num_inserts));
	info("Bases masked by EAMSS algorithm: %"PRIu64"\n", counters.masked_bases);
	info("\n");

	info("The simulated reads are in the files:\n");
	info("    %s\n", files.out_file_1.s_filename);
	info("    %s\n", files.out_file_2.s_filename);

	if (files.info_log_file.is_open()) {
		info("\n");
		info("Information about each simulated read has been logged to "
			"the file:\n");
		info("    %s\n", files.info_log_file.s_filename);
	}

	if (files.insert_distr_log_file.is_open()) {
		log_insert_len_distr(files.insert_distr_log_file,
				     counters.insert_len_distr,
				     params.insert_len_mean,
				     params.insert_len_sd);
		info("\n");
		info("The insert length distribution has been logged to the "
			"file:\n");
		info("    %s\n", files.insert_distr_log_file.s_filename);
	}

	if (files.error_distr_log_file.is_open()) {
		log_error_pos_distr(files.error_distr_log_file,
				    counters.error_pos_distr,
				    params,
				    *profiles.base_calling_profile,
				    num_read_pairs);
		info("\n");
		info("The error position distribution has been logged to the "
			"file:\n");
		info("    %s\n", files.error_distr_log_file.s_filename);
	}

	// Warn the user about some possible problems with the parameters they
	// ran the simulation with.
	if (params.num_input_refs == 2 && !params.diploid) {
		info("\n");
		info("Note: Exactly two input reference sequences were supplied, but the `--diploid'\n");
		info("option was not provided.  By default, we assume that the input sequences are\n");
		info("haploid, so we simulate reads at full coverage from them.  If you intended to\n");
		info("simulate reads from a diploid genome, you should have used a coverage level\n");
		info("half what you wanted, or else provided the `--diploid' option to let us\n");
		info("know that we should simulate reads at half coverage.\n");
	}
	if (params.simulate_quality_values && !params.simulate_substitution_errors) {
		info("\n");
		info("Note: The substitution error rate was set to 0, but we still had to simulate\n");
		info("quality values to write the FASTQ files.  The quality values are all set\n");
		info("to 40 (letter '%c') since without substitution errors, we didn't have a\n",
					40 + params.quality_shift);
		info("base-calling profile available to simulate the quality values.  If this isn't\n");
		info("what you wanted, you should specify a non-zero error rate, or else provide the\n");
		info("`--no-quality-vals' option so that we can write a FASTA file with no quality\n");
		info("values.\n");
	}
	if (params.user_specified_random_seed && params.num_simulator_threads > 1) {
		info("\n");
		info("Note: A random seed (%"PRIu64") was specified on the command line.\n",
					params.random_seed);
		info("However, we ran with %d simulator threads.  Because there was more than one\n",
					params.num_simulator_threads);
		info("simulator thread, the results of the simulation will not be exactly\n");
		info("reproducible, even if specify the same random seed again.  If you want to\n");
		info("make the simulation exactly reproducible, you should specify --threads=1.\n");
	}
	if (params.quality_shift != 33 && params.quality_shift != 64) {
		info("\n");
		info("Note: quality_shift was set to %d, but it usually is 33 or 64!",
				params.quality_shift);
	}

	// The SimulationFiles and SimulationProfiles structures are
	// destructed here.  This results in the input and output files being
	// closed, and dynamically allocated memory for the profiles being
	// freed.
}
